###################################################################################################
# Analyze Relationship Between Congressional Roll Call Voting and Region ##########################
# Michael Olson and James Snyder ##################################################################
###################################################################################################

###################################################################################################
# load and clean various datasets #################################################################
###################################################################################################

# main data ################

# load data for early and late periods, label them as such, and combine

  prc10 <- read.csv("./better_prohibition_rollcall_agdat_1910.csv")
  prc10$era <- "early"; prc10$era_num <- 1
  prc10$Region <- as.factor(ifelse(prc10$south==1,"South","North"))
  prc30 <- read.csv("./better_prohibition_rollcall_agdat_1930.csv")
  prc30$era <- "late"; prc30$era_num <- 2
  prc30$Region <- as.factor(ifelse(prc30$south==1,"South","North"))
  
  prc   <- rbind(prc10,prc30)
  prc$distid <- paste(prc$distid,prc$era,sep="_")
  prc$state <- as.character(prc$state)
  
  # limit to not-at-large districts
  
  prc <- prc[prc$at_large==0,]
  
  # add terciles
  
  prc$terciles <- factor(ifelse(prc$district_preference<=quantile(prc$district_preference,0.33)[1],1,
                                ifelse(prc$district_preference>=quantile(prc$district_preference,0.66)[1],3,2)))
  
  # create a "solid north" variable based on state legislative seat shares
  # from Dubin (2006)
  
  dubin <- read.dta("./seat_numbers.dta")
  dubin_early <- dubin[dubin$year<=1920 & dubin$year>=1910,]
  dubin_early <- as.data.frame(dubin_early %>% group_by(state) %>% summarise(n_dem = sum(lhd+uhd,na.rm=TRUE),
                                                                             n_rep = sum(lhr+uhr,na.rm=TRUE),
                                                                             n_tot = sum(lht+uht,na.rm=TRUE)))  # checked this--using sum doesn't make a difference
  dubin_early$rep_share <- dubin_early$n_rep/dubin_early$n_tot
  dubin_early$dem_share <- dubin_early$n_dem/dubin_early$n_tot
  dubin_early$south <- ifelse(is.element(dubin_early$state,c("AL","AR","FL","GA","LA","MS","NC","SC","TN","TX","VA")),1,0)
  
  prc$solid_early <- ifelse(is.element(prc$state,dubin_early$state[(dubin_early$rep_share>=0.7 | dubin_early$dem_share>=0.7) & dubin_early$south==0]) & 
                             prc$era=="early",1,0)
  
  dubin_late <- dubin[dubin$year<=1935 & dubin$year>=1928,]
  dubin_late <- as.data.frame(dubin_late %>% group_by(state) %>% summarise(n_dem = sum(lhd+uhd,na.rm=TRUE),
                                                                           n_rep = sum(lhr+uhr,na.rm=TRUE),
                                                                           n_tot = sum(lht+uht,na.rm=TRUE)))  # checked this--using sum doesn't make a difference
  dubin_late$rep_share <- dubin_late$n_rep/dubin_late$n_tot
  dubin_late$dem_share <- dubin_late$n_dem/dubin_late$n_tot
  dubin_late$south <- ifelse(is.element(dubin_late$state,c("AL","AR","FL","GA","LA","MS","NC","SC","TN","TX","VA")),1,0)
  
  prc$solid_late <- ifelse(is.element(prc$state,dubin_late$state[(dubin_late$rep_share>=0.7 | dubin_late$dem_share>=0.7) & dubin_late$south==0]) & 
                              prc$era=="late",1,0)
  
  prc$solid_north <- ifelse((prc$era=="early" & prc$south==0 & prc$solid_early==1) |
                              (prc$era=="late" & prc$south==0 & prc$solid_late==1)
                              ,1,0)
  
  prc$rowid <- 1:nrow(prc)
  
# alternative data ################
  
# legislator-level data
# cleaning is very similar to the main data (but no solid north variable)
  
  prcleg10 <- read.csv("./better_prohibition_rollcall_agdat_legislator_1910.csv")
  prcleg10$era <- "early"; prcleg10$era_num <- 1
  prcleg10$Region <- as.factor(prcleg10$south)
  prcleg30 <- read.csv("./better_prohibition_rollcall_agdat_legislator_1930.csv")
  prcleg30$era <- "late"; prcleg30$era_num <- 2
  prcleg30$Region <- as.factor(prcleg30$south)
  
  prcleg   <- rbind(prcleg10,prcleg30)
  prcleg$distid <- paste(prcleg$distid,prcleg$era,sep="_")
  
  prcleg <- prcleg[is.element(prcleg$party,c(100,200)),]  # there are really few folks with different/multiple party affiliations
  
  prcleg$dem <- ifelse(prcleg$party==100,1,0)
  
  prcleg <- prcleg[prcleg$at_large==0,]
  
  prcleg$rowid <- 1:nrow(prcleg)
  
# pooled over-time data
  
  prcpooled <- read.csv("./better_prohibition_rollcall_agdat.csv")
  
  prcpooled <- prcpooled[prcpooled$at_large==0,]
  
  prcpooled$rowid <- 1:nrow(prcpooled)
  
# mixed period data
  
# we don't actually use this for analysis, since it still averages over the life of a district
# and so sort of undercuts the value of only retaining pre-vote referendums in the preference measure.
# instead, we use this data to create matched districts, and use these matched districts in a 
# vote-level analysis.
  
  prc10_mix <- read.csv("./better_prohibition_rollcall_agdat_1910_timing.csv")
  prc10_mix$era <- "old"; prc10_mix$era_num <- 1
  prc30_mix <- read.csv("./better_prohibition_rollcall_agdat_1930_timing.csv")
  prc30_mix$era <- "new"; prc30_mix$era_num <- 2
  
  prc_mixed <- rbind(prc10_mix,prc30_mix)
  prc_mixed$distid <- paste(prc_mixed$distid,prc_mixed$era,sep="_")
  prc_mixed$state <- as.character(prc_mixed$state)  
  
  prc_mixed <- prc_mixed[prc_mixed$at_large==0,]
  
  prc_mixed$rowid <- 1:nrow(prc_mixed)
  
# matching ################
 
# now we create a matched dataset for each of the above 
  
  set.seed(02138)  
  
# main data #
  
  prc_matcher <- prc  # create duplicate of main data
  
  # mahalanobis distance matching

  south_match_out <- Match(Tr=prc_matcher$south,
                           X=cbind(prc_matcher$district_preference,prc_matcher$era_num),
                           exact=c(F,T),    # we nearest-neighbor match, within era
                           caliper=0.1,Weight=2,  # mahalanobis distance
                           distance.tolerance = 1e-100000000,
                           ties=F)               # break ties randomly
  
  # extract matched districts
  
  included_dists <- prc_matcher$distid[c(south_match_out$index.control,south_match_out$index.treated)]

  # now loop over the included districts
  # we do this because there are potential duplicates in the matched districts, and we want to include them 
  # the correct number of times
  
  list_binder <- list()
  
  for(j in 1:length(included_dists)){
    
    list_binder[[j]] <- prc[prc$distid==included_dists[j],]

  }

  matched_prc <- do.call(rbind,list_binder)
  matched_prc$temp_outcome <- matched_prc$mean_pro_choice
  matched_prc$rowid <- 1:nrow(matched_prc)
  
  # we use this same set of districts to create a matched version of the legislator-level data
  
  list_binder_leg <- list()
  
  for(j in 1:length(included_dists)){
    
    list_binder_leg[[j]] <- prcleg[prcleg$distid==included_dists[j],]
    
  }
  
  matched_prcleg <- do.call(rbind,list_binder_leg)
  matched_prcleg$temp_outcome <- matched_prcleg$mean_pro_choice
  matched_prcleg$rowid <- 1:nrow(matched_prcleg)
  
# pooled data #
  
  south_pooled_match_out <- Match(Tr=prcpooled$south,
                           X=cbind(prcpooled$district_preference),
                           exact=c(F),  # here there's no time era to think about
                           caliper=0.1,Weight=2,
                           distance.tolerance = 1e-10000,
                           ties=F)
  
  included_pooled_dists <- prcpooled$distid[c(south_pooled_match_out$index.control,south_pooled_match_out$index.treated)]
  
  list_pooled_binder <- list()
  
  for(j in 1:length(included_pooled_dists)){
    
    list_pooled_binder[[j]] <- prcpooled[prcpooled$distid==included_pooled_dists[j],]
    
  }
  
  matched_pooled_prc <- do.call(rbind,list_pooled_binder)
  matched_pooled_prc$temp_outcome <- matched_pooled_prc$mean_pro_choice
  matched_pooled_prc$rowid <- 1:nrow(matched_pooled_prc)
  
# mixed data #
  
  prc_mix_matcher <- prc_mixed
  
  south_mix_match_out <- Match(Tr=prc_mix_matcher$south,
                           X=cbind(prc_mix_matcher$district_preference,prc_mix_matcher$era_num),
                           exact=c(F,T),
                           caliper=0.1,Weight=2,
                           distance.tolerance = 1e-100000000,
                           ties=F)
  
  included_mix_dists <- prc_mix_matcher$distid[c(south_mix_match_out$index.control,south_mix_match_out$index.treated)]
  
    # we'll use these a little later
  
# "no local option" data
  
  prc_matcher_nlo <- prc[!is.na(prc$dp_nlo),]
  
  south_match_out_nlo <- Match(Tr=prc_matcher_nlo$south,
                           X=cbind(prc_matcher_nlo$dp_nlo,prc_matcher_nlo$era_num),
                           exact=c(F,T),
                           caliper=0.1,Weight=2,
                           distance.tolerance = 1e-100000000,
                           ties=F)
  
  # extract matched districts
  
  included_dists_nlo <- prc_matcher_nlo$distid[c(south_match_out_nlo$index.control,south_match_out_nlo$index.treated)]

  list_binder_nlo <- list()
  
  for(j in 1:length(included_dists_nlo)){
    
    list_binder_nlo[[j]] <- prc[prc$distid==included_dists_nlo[j],]
    
  }
  
  matched_prc_nlo <- do.call(rbind,list_binder_nlo)
  matched_prc_nlo$temp_outcome <- matched_prc_nlo$mean_pro_choice
  matched_prc_nlo$rowid <- 1:nrow(matched_prc_nlo)
  
###################################################################################################
# DESCRIPTIVE PLOTS AND STATISTICS ################################################################
###################################################################################################   
  
# distribution of preferences
  
  cairo_ps("./results/cd_preference_histogram.eps",width=6,height=3.5,fallback_resolution = 1200)
  pref_hist <- ggplot(data=prc,aes(x=district_preference,group=Region,fill=Region,colour=Region))+
    geom_density(size=0.75,alpha=0.3)+
    xlim(0,1)+
    xlab("Share District Pro-Prohibition")+
    ylab("Density")+
    scale_color_manual(values=c("black","grey50"))+
    scale_fill_manual(values=c("black","grey50"))+
    theme_minimal()
  print(pref_hist)
  dev.off()
  
# distribution of preferences, 1910
  
    cairo_ps("./results/cd_preference_histogram_1910.eps",width=3.25,height=2,fallback_resolution = 1200)
    pref_hist <- ggplot(data=prc10,aes(x=district_preference,group=Region,fill=Region,colour=Region))+
      geom_density(size=0.75,alpha=0.3)+
      xlim(0,1)+
      xlab("Share District Pro-Prohibition")+
      ylab("Density")+
      scale_color_manual(values=c("black","grey50"))+
      scale_fill_manual(values=c("black","grey50"))+
      theme_minimal()
    print(pref_hist)
    dev.off()
    
# distribution of preferences, 1930
    
    cairo_ps("./results/cd_preference_histogram_1930.eps",width=3.25,height=2,fallback_resolution = 1200)
    pref_hist <- ggplot(data=prc30,aes(x=district_preference,group=Region,fill=Region,colour=Region))+
      geom_density(size=0.75,alpha=0.3)+
      xlim(0,1)+
      xlab("Share District Pro-Prohibition")+
      ylab("Density")+
      scale_color_manual(values=c("black","grey50"))+
      scale_fill_manual(values=c("black","grey50"))+
      theme_minimal()
    print(pref_hist)
    dev.off()
    
# distribution of preferences, matched sample
    
    cairo_ps("./results/cd_preference_histogram_matched.eps",width=3.25,height=2,fallback_resolution = 1200)
    pref_hist <- ggplot(data=matched_prc,aes(x=district_preference,group=Region,fill=Region,colour=Region))+
      geom_density(size=0.75,alpha=0.3)+
      xlim(0,1)+
      xlab("Share District Pro-Prohibition")+
      ylab("Density")+
      scale_color_manual(values=c("black","grey50"))+
      scale_fill_manual(values=c("black","grey50"))+
      theme_minimal()
    print(pref_hist)
    dev.off()
  
# distribution of roll call vote scores
    
    cairo_ps("./results/cd_vote_histogram.eps",width=6,height=3.5,fallback_resolution = 1200)
    vote_hist <- ggplot(data=prc,aes(x=mean_pro_choice,group=Region,fill=Region,colour=Region))+
      geom_density(size=0.75,alpha=0.3)+
      xlim(0,1)+
      xlab("Share Roll Calls Pro-Prohibition")+
      ylab("Density")+
      scale_color_manual(values=c("black","grey50"))+
      scale_fill_manual(values=c("black","grey50"))+
      theme_minimal()
    print(vote_hist)
    dev.off()
  
# summary statistics table
  
  sumstat_df <- prc[,c("mean_pro_choice","district_preference","n_votes","south")]
  
  sumstats_sg <- stargazer(sumstat_df,summary=TRUE,
                           title="Summary Statistics, Congressional Roll Call Voting",
                           label="sumstats",
                           covariate.labels = c("Prohibition Roll Call Score",
                                                "District Preference on Prohibition",
                                                "Number of Prohibition Votes",
                                                "South"),
                           summary.stat=c("mean","median","sd","min","max","n"),
                           out="./results/sumstats.tex")
  
# summary statistics table, matched
  
  matched_sumstat_df <- matched_prc[,c("mean_pro_choice","district_preference","n_votes","south")]
  
  sumstats_sg <- stargazer(matched_sumstat_df,summary=TRUE,
                           title="Summary Statistics, Congressional Roll Call Voting, Matched Sample",
                           label="sumstats_matched",
                           covariate.labels = c("Prohibition Roll Call Score",
                                                "District Preference on Prohibition",
                                                "Number of Prohibition Votes",
                                                "South"),
                           summary.stat=c("mean","median","sd","min","max","n"),
                           out="./results/sumstats_matched.tex")
  
###################################################################################################
# ANALYSIS ########################################################################################
###################################################################################################   
  
  # note: we use heteroskedasticity-robust standard errors for all analyses. to do so, and to 
  # simplify the code, we "cluster" on a unique row id for each regression. this simply facilitates
  # the felm() function attaching the robust standard error directly to the regression object,
  # meaning that we don't need to manually give the robust ses to stargazer to report results.
  # this is why we appear to be "clustering" our inference.
  
# REGIONAL HETEROGENEITY ANALYSIS PLOT

  cairo_ps("./results/region_scatter.eps",width=6,height=3.5,fallback_resolution = 1200)
  twoway_scatter <- ggplot(data=prc,aes(x=district_preference,y=mean_pro_choice,group=Region,colour=Region))+
    geom_point(size=1,alpha=0.25)+
    geom_smooth(se=FALSE,method="loess",size=3)+
    geom_vline(xintercept=0.5,size=2,linetype=2,colour="black")+
    scale_color_manual(values=c("black","grey50"))+
    scale_fill_manual(values=c("black","grey50"))+
    xlab("Share District Pro-Prohibition")+
    ylab("Share Roll Calls Pro-Prohibition")+
    theme_minimal()
  print(twoway_scatter)
  dev.off()
  
# Aggregate Score Regressions
  
  int   <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=prc)
  
  int_weights <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=prc,weights=prc$n_votes)
  
  int_matched   <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_prc)
  
  int_weights_matched <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_prc,weights=matched_prc$n_votes)
  
  bivariate_sg <- stargazer(int,int_weights,int_matched,int_weights_matched,
                            notes.append = FALSE,notes.label = "",
                            notes="\\parbox[t]{0.75\\textwidth}{\\footnotesize \\textit{Note}: Entries are linear regression coefficients with 
                            robust standard errors in parentheses. Weighted models are weighted according to the number of votes 
                            cast by each district's representatives.
                            $^{*}$p$<$0.05 (two-tailed test).}",
                            add.lines = list(c("Weights","","\\checkmark","","\\checkmark")),
                            keep.stat = c("adj.rsq","n"),
                            star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,
                            table.placement = "!ht",no.space=T,
                            dep.var.caption = "Share Votes Pro-Prohibition",
                            dep.var.labels = c("Unmatched","Matched"),
                            covariate.labels = c("District Preference","South","District Preference $\\times$ South","Constant"),
                            label="region_bivariate",
                            table.layout ="-ld-#-t-as-n",
                            title="Constituent Preferences and Congressional Roll Call Voting on Prohibition")
  
  cat(bivariate_sg, sep = '\n', file = "./results/region_bivariate.tex")
  
# ROBUSTNESS CHECKS #
  
# Drop Sub-County Districts
  
  notrouble_int   <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=prc[prc$trouble_dist==0,])
  
  notrouble_int_weights <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=prc[prc$trouble_dist==0,],weights=prc[prc$trouble_dist==0,]$n_votes)
  
  notrouble_int_matched   <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_prc[matched_prc$trouble_dist==0,])
  
  notrouble_int_weights_matched <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_prc[matched_prc$trouble_dist==0,],weights=matched_prc[matched_prc$trouble_dist==0,]$n_votes)

  trouble_sg <- stargazer(notrouble_int,notrouble_int_weights,notrouble_int_matched,notrouble_int_weights_matched,
                            notes.append = FALSE,notes.label = "",
                            notes="\\parbox[t]{0.71\\textwidth}{\\footnotesize \\textit{Note}: Entries are linear regression coefficients with 
                        robust standard errors in parentheses. Weighted models are weighted according to the number of votes 
                        cast by each district's representatives. Matching is based on full sample, with sub-county districts. $^{*}$p$<$0.05 (two-tailed test).}",
                            add.lines = list(c("Weights","","\\checkmark","","\\checkmark")),
                            keep.stat = c("adj.rsq","n"),
                            star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,
                            no.space=T,
                            table.placement = "!ht",
                            dep.var.caption = "Share Votes Pro-Prohibition",
                            dep.var.labels = c("Unmatched","Matched"),
                            covariate.labels = c("District Preference","South","District Preference $\\times$ South","Constant"),
                            label="trouble",
                            table.layout ="-ld-#-t-as-n",
                            title="Congressional Roll Call Voting, No Sub-County Districts")
  
  cat(trouble_sg, sep = '\n', file = "./results/trouble.tex")
  
# Legislator-Level Regressions

  leg_int   <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=prcleg)
  
  leg_int_weights <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=prcleg,weights=prcleg$n_votes)
  
  leg_int_matched   <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_prcleg)
  
  leg_int_weights_matched <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_prcleg,weights=matched_prcleg$n_votes)
  
  leg_sg <- stargazer(leg_int,leg_int_weights,leg_int_matched,leg_int_weights_matched,
                          notes.append = FALSE,notes.label = "",
                          notes="\\parbox[t]{0.75\\textwidth}{\\footnotesize \\textit{Note}: Entries are linear regression coefficients with 
                          standard errors clustered by district in parentheses. Weighted models are weighted according to the number of votes 
                          cast by each legislator. Matching is conducted using district-level data. $^{*}$p$<$0.05 (two-tailed test).}",
                          add.lines = list(c("Weights","","\\checkmark","","\\checkmark")),
                          keep.stat = c("adj.rsq","n"),
                          star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,
                          table.placement = "!ht",
                          dep.var.caption = "Share Votes Pro-Prohibition",no.space=T,
                          dep.var.labels = c("Unmatched","Matched"),
                          covariate.labels = c("District Preference","South","District Preference $\\times$ South","Constant"),
                          label="legislator",
                          table.layout ="-ld-#-t-as-n",
                          title="Congressional Roll Call Voting, Legislator-District-Level Data")
  
  cat(leg_sg, sep = '\n', file = "./results/legislator.tex")
  
# Party Regressions
  
  leg_party   <- felm(mean_pro_choice~district_preference*south*I(party==100)|0|0|rowid,data=prcleg)
  
  cairo_ps("./results/partysplit.eps",width=6,height=3.5,fallback_resolution = 1200)
  print(ggplot(data=prcleg,aes(x=district_preference,y=mean_pro_choice))+
    geom_jitter(size=1,width=0,height=0.025)+
    geom_abline(size=2.5,aes(slope=coef(leg_party)["district_preference"],
                intercept=coef(leg_party)["(Intercept)"],
                colour="Northern Republican"))+
    geom_abline(size=2.5,aes(slope=coef(leg_party)["district_preference"]+coef(leg_party)["district_preference:I(party == 100)TRUE"],
                intercept=coef(leg_party)["(Intercept)"]+coef(leg_party)["I(party == 100)TRUE"],
                colour="Northern Democrat"))+
    geom_abline(size=2.5,aes(slope=sum(coef(leg_party)[grep("district_preference",names(coef(leg_party)))]),
                intercept=coef(leg_party)["(Intercept)"]+coef(leg_party)["I(party == 100)TRUE"]+coef(leg_party)["south"]+coef(leg_party)["south:I(party == 100)TRUE"],
                colour="Southern Democrat"))+
    scale_colour_manual("",
                        values = c("Northern Republican"="black",
                                   "Northern Democrat"="grey70",
                                   "Southern Democrat"="grey40")) +
     scale_y_continuous("Congressional Roll Call Score", limits = c(-0.05,1.05)) + 
     xlab("Share District Pro-Prohibition")+
     ylab("Share Roll Calls Pro-Prohibition")+
    theme_minimal())
  dev.off()
  
  leg_party_int   <- felm(mean_pro_choice~district_preference*south+I(party==100)|0|0|rowid,data=prcleg)
  
  leg_party_int_weights <- felm(mean_pro_choice~district_preference*south+I(party==100)|0|0|rowid,data=prcleg,weights=prcleg$n_votes)
  
  leg_party_int2 <- leg_party 
  
  leg_party_int_matched   <- felm(temp_outcome~district_preference*south+I(party==100)|0|0|rowid,data=matched_prcleg)
  
  leg_party_int_weights_matched  <- felm(temp_outcome~district_preference*south+I(party==100)|0|0|rowid,data=matched_prcleg,weights=matched_prcleg$n_votes)
  
  leg_party_int2_matched  <- felm(temp_outcome~district_preference*south*I(party==100)|0|0|rowid,data=matched_prcleg)
  
  leg_party_sg <- stargazer(leg_party_int,leg_party_int_weights,leg_party_int2,
                            leg_party_int_matched,leg_party_int_weights_matched,leg_party_int2_matched,
                            notes.append = FALSE,notes.label = "",
                            notes="\\parbox[t]{\\textwidth}{\\footnotesize \\textit{Note}: Entries are linear regression coefficients with 
                                    standard errors clustered by district in parentheses. Weighted models are weighted according to the number of votes 
                                    cast by each legislator. Matching is based on district-level data. $^{*}$p$<$0.05 (two-tailed test).}",
                            add.lines = list(c("Weights","","\\checkmark","","","\\checkmark","")),
                            keep.stat = c("adj.rsq","n"),
                            star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,
                            table.placement = "!ht",no.space=T,
                            dep.var.caption = "Share Votes Pro-Prohibition",
                            dep.var.labels = c("Unmatched","Matched"),
                            covariate.labels = c("District Preference","South","Democrat","Preference $\\times$ South",
                                                 "Preference $\\times$ Democrat","South $\\times$ Democrat",
                                                 "Preference $\\times$ South $\\times$ Dem.",
                                                 "Constant"),
                            label="legislator_party",
                            table.layout ="-ld-#-t-as-n",
                            title="Congressional Roll Call Voting, Controlling for Party")
  
  cat(leg_party_sg, sep = '\n', file = "./results/legislator_party.tex")
  
  
# leave one state out
  
  prc <- dplyr::arrange(prc,desc(south),state,distid)
  
  state_out <- list()
  
  for(j in 1:length(unique(prc$state))){
    
    bit <- prc[prc$state!=unique(prc$state)[j],]
    
    reg   <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=bit)
    
    state_out[[j]] <- c(unique(prc$state)[j],
                        coef(reg)["district_preference:south"],
                        sqrt(reg$clustervcv["district_preference:south","district_preference:south"]))
    
  }
  
  state_out <- as.data.frame(do.call(rbind,state_out))
  colnames(state_out) <- c("state","coef","se")
  state_out$coef <- as.numeric(as.character(state_out$coef)); state_out$se <- as.numeric(as.character(state_out$se))
  state_out$south <- 0; state_out[is.element(state_out$state,
                                             c("AR","AL","GA","FL","NC","SC","TX","VA","TN","LA","MS")),"south"] <- 1
  state_out <- dplyr::arrange(state_out,desc(south),state)
  state_out$Region <- as.factor(ifelse(state_out$south==1,"South","North"))
  
  base_reg   <- felm(mean_pro_choice~district_preference*south|0|0|distid,data=prc)
  
  state_out$state <- factor(state_out$state,
                            levels=c("AL","AR","CA","CO","CT",
                                     "FL","GA","ID","IL","IN",
                                     "IA","KY","LA","ME","MD",
                                     "MA","MI","MN","MS","MO",
                                     "MT","NE","NJ","NY","NC",
                                     "ND","OH","OK","OR","PA",
                                     "SC","SD","TX","UT","VA",
                                     "VT","WA","WV","WI"))
  
  cairo_ps("./results/omitted_state.eps",width=7,height=3.5,fallback_resolution = 1200)
  omit_state_plot <- ggplot(data=state_out,aes(x=state,y=coef,colour=Region))+
    geom_point(size=2)+
    geom_errorbar(aes(ymax=coef+1.96*se,ymin=coef-1.96*se),size=1.25)+
    scale_color_manual(values=c("black","grey70"))+
    scale_fill_manual(values=c("black","grey70"))+
    geom_hline(yintercept=coef(base_reg)["district_preference:south"],
               size=2,colour="indianred",linetype=2)+
    ylab("Coefficient on 'District Preference X South'")+
    xlab("Omitted State")+
    theme_minimal()+
    theme(axis.text=element_text(size=6))
  print(omit_state_plot)
  dev.off()
  
# Different Eras Regressions
  
  early   <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=prc[prc$era=="early",])
  
  early_weights <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=prc[prc$era=="early",],weights=prc$n_votes[prc$era=="early"])
  
  early_matched   <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_prc[matched_prc$era=="early",])
  
  early_weights_matched <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_prc[matched_prc$era=="early",],
                              weights=matched_prc$n_votes[matched_prc$era=="early"])
  
  early_sg <- stargazer(early,early_weights,early_matched,early_weights_matched,
                            notes.append = FALSE,notes.label = "",
                            notes="\\parbox[t]{0.725\\textwidth}{\\footnotesize \\textit{Note}: Entries are linear regression coefficients with 
                            robust standard errors in parentheses. Weighted models are weighted according to the number of votes 
                            cast by each district's representatives. $^{*}$p$<$0.05 (two-tailed test).}",
                            add.lines = list(c("Weights","","\\checkmark","","\\checkmark")),
                            keep.stat = c("adj.rsq","n"),
                            star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,
                            table.placement = "!ht",
                            dep.var.caption = "Share Votes Pro-Prohibition",no.space=T,
                            dep.var.labels = c("Unmatched","Matched"),
                            covariate.labels = c("District Preference","South","District Preference $\\times$ South","Constant"),
                            label="region_early",
                            table.layout ="-ld-#-t-as-n",
                            title="Congressional Roll Call Voting, 1910-1920")
  
  cat(early_sg, sep = '\n', file = "./results/early_bivariate.tex")
  
  
  late   <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=prc[prc$era=="late",])
  
  late_weights <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=prc[prc$era=="late",],weights=prc$n_votes[prc$era=="late"])
  
  late_matched   <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_prc[matched_prc$era=="late",])
  
  late_weights_matched <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_prc[matched_prc$era=="late",],
                                weights=matched_prc$n_votes[matched_prc$era=="late"])
  
  late_sg <- stargazer(late,late_weights,late_matched,late_weights_matched,
                        notes.append = FALSE,notes.label = "",
                        notes="\\parbox[t]{0.725\\textwidth}{\\footnotesize \\textit{Note}: Entries are linear regression coefficients with 
                        robust standard errors in parentheses. Weighted models are weighted according to the number of votes 
                        cast by each district's representatives. $^{*}$p$<$0.05 (two-tailed test).}",
                        add.lines = list(c("Weights","","\\checkmark","","\\checkmark")),
                        keep.stat = c("adj.rsq","n"),
                        star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,
                        table.placement = "!ht",
                        dep.var.caption = "Share Votes Pro-Prohibition",
                        no.space=T,
                        dep.var.labels = c("Unmatched","Matched"),
                        covariate.labels = c("District Preference","South","District Preference $\\times$ South","Constant"),
                        label="region_late",
                        table.layout ="-ld-#-t-as-n",
                        title="Congressional Roll Call Voting, 1928-1935")
  
  cat(late_sg, sep = '\n', file = "./results/late_bivariate.tex")
  
# full period (pooled) model  
  
  # unmatched
  
  pooled   <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=prcpooled)
  
  pooled_weights <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=prcpooled,weights=prcpooled$n_votes)
  
  # matched
  
  pooled_matched   <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_pooled_prc)
  
  pooled_matched_weights <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_pooled_prc,weights=matched_pooled_prc$n_votes)
  
  pooled_sg <- stargazer(pooled,pooled_weights,pooled_matched,pooled_matched_weights,
                       notes.append = FALSE,notes.label = "",
                       notes="\\parbox[t]{0.725\\textwidth}{\\footnotesize \\textit{Note}: Entries are linear regression coefficients with 
                        robust standard errors in parentheses. Weighted models are weighted according to the number of votes 
                        cast by each district's representatives. $^{*}$p$<$0.05 (two-tailed test).}",
                       add.lines = list(c("Weights","","\\checkmark","","\\checkmark")),
                       keep.stat = c("adj.rsq","n"),
                       star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,
                       table.placement = "!ht",
                       dep.var.caption = "Share Votes Pro-Prohibition",
                       dep.var.labels = c("Unmatched","Matched"),
                       covariate.labels = c("District Preference","South","District Preference $\\times$ South","Constant"),
                       label="region_pooled",no.space = T,
                       table.layout ="-ld-#-t-as-n",
                       title="Congressional Roll Call Voting, 1900-1935")
  
  cat(pooled_sg, sep = '\n', file = "./results/pooled_bivariate.tex")
  
# Solid North
  
  int_north   <- felm(mean_pro_choice~district_preference*(south+solid_north)|0|0|rowid,data=prc)
  
  # waldtest(int_north, ~`district_preference:south`-`district_preference:solid_north`)
  
  int_north_weights <- felm(mean_pro_choice~district_preference*(south+solid_north)|0|0|rowid,data=prc,weights=prc$n_votes)
  
  # waldtest(int_north_weights, ~`district_preference:south`-`district_preference:solid_north`)
  
  bivariate_north_sg <- stargazer(int_north,int_north_weights,
                                  notes.append = FALSE,notes.label = "",
                                  notes="\\parbox[t]{0.675\\textwidth}{\\footnotesize \\textit{Note}: Entries are linear regression coefficients with 
                                  robust standard errors in parentheses. `Solid North` is an indicator for whether 70\\% or more of a state's state legislators
                                  were of one party during the relevant time period. Weighted models are weighted according to the number of votes 
                                  cast by each district's representatives. $^{*}$p$<$0.05 (two-tailed test).}",
                                  add.lines = list(c("Weights","","\\checkmark","","\\checkmark")),
                                  keep.stat = c("adj.rsq","n"),no.space = T,
                                  star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,
                                  table.placement = "!ht",
                                  dep.var.labels = "Share Votes Pro-Prohibition",
                                  covariate.labels = c("District Preference","South","`Solid North'","District Preference $\\times$ South",
                                                       "District Preference $\\times$ `Solid North'",
                                                       "Constant"),
                                  label="sn_cong",
                                  table.layout ="-ld-#-t-as-n",
                                  title="Congressional Roll Call Voting on Prohibition, `Solid North'")
  
  cat(bivariate_north_sg, sep = '\n', file = "./results/region_north_bivariate.tex")
  
# Aggregate Score Regressions, no local option
  
  int_nlo   <- felm(mean_pro_choice~dp_nlo*south|0|0|rowid,data=prc)
  
  int_weights_nlo <- felm(mean_pro_choice~dp_nlo*south|0|0|rowid,data=prc,weights=prc$n_votes)
  
  int_matched_nlo   <- felm(temp_outcome~dp_nlo*south|0|0|rowid,data=matched_prc_nlo)
  
  int_weights_matched_nlo <- felm(temp_outcome~dp_nlo*south|0|0|rowid,data=matched_prc_nlo,weights=matched_prc_nlo$n_votes)
  
  bivariate_nlo_sg <- stargazer(int_nlo,int_weights_nlo,int_matched_nlo,int_weights_matched_nlo,
                                notes.append = FALSE,notes.label = "",
                                notes="\\parbox[t]{0.75\\textwidth}{\\footnotesize \\textit{Note}: Entries are linear regression coefficients with 
                                        robust standard errors in parentheses. Weighted models are weighted according to the number of votes 
                                        cast by each district's representatives. $^{*}$p$<$0.05 (two-tailed test).}",
                                add.lines = list(c("Weights","","\\checkmark","","\\checkmark")),
                                keep.stat = c("adj.rsq","n"),
                                star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,
                                table.placement = "!ht",no.space=T,
                                dep.var.caption = "Share Votes Pro-Prohibition",
                                dep.var.labels = c("Unmatched","Matched"),
                                covariate.labels = c("District Preference","South","District Preference $\\times$ South","Constant"),
                                label="region_bivariate_nlo",
                                table.layout ="-ld-#-t-as-n",
                                title="Congressional Roll Call Voting on Prohibition, No Local Option")
  
  cat(bivariate_nlo_sg, sep = '\n', file = "./results/region_bivariate_nlo.tex")  
  
# Majority Models 
  
  int_maj   <- felm(mean_pro_choice~I(district_preference>=0.5)*south|0|0|rowid,data=prc)
  
  int_weights_maj <- felm(mean_pro_choice~I(district_preference>=0.5)*south|0|0|rowid,data=prc,
                          weights=prc$n_votes)
  
  int_matched_maj   <- felm(temp_outcome~I(district_preference>=0.5)*south|0|0|rowid,data=matched_prc)
  
  int_weights_matched_maj <- felm(temp_outcome~I(district_preference>=0.5)*south|0|0|rowid,data=matched_prc,
                                  weights=matched_prc$n_votes)
  
  bivariate_maj_sg <- stargazer(int_maj,int_weights_maj,int_matched_maj,int_weights_matched_maj,
                                notes.append = FALSE,notes.label = "",
                                notes="\\parbox[t]{0.825\\textwidth}{\\footnotesize \\textit{Note}: Entries are linear regression coefficients with 
                                robust standard errors in parentheses. Weighted models are weighted according to the number of votes 
                                cast by each district's representatives. $^{*}$p$<$0.05 (two-tailed test).}",
                                add.lines = list(c("Weights","","\\checkmark","","\\checkmark")),
                                keep.stat = c("adj.rsq","n"),
                                star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,
                                table.placement = "!ht",
                                no.space = T,
                                dep.var.caption = "Share Votes Pro-Prohibition",
                                dep.var.labels = c("Unmatched","Matched"),
                                covariate.labels = c("District Maj. for Prohibition","South","District Maj. for Prohibition $\\times$ South","Constant"),
                                label="region_bivariate_maj",
                                table.layout ="-ld-#-t-as-n",
                                title="Majority Preferences and Congressional Roll Call Voting on Prohibition")
  
  cat(bivariate_maj_sg, sep = '\n', file = "./results/region_bivariate_maj.tex")
  
# lopping off the middle
  
  lop_sequence <- seq(from=0,to=0.3,by=0.025)
  
  lop_coef <- list()
  
  for(j in 1:length(lop_sequence)){
    
    loop_weight_reg <- felm(mean_pro_choice~district_preference*south|0|0|rowid,
                            data=prc[abs(prc$district_preference-0.5)>lop_sequence[j],])
    
    lop_coef[[j]] <- summary(loop_weight_reg)$coefficients["district_preference:south",]
    
  }
  
  lop_coef <- as.data.frame(cbind(lop_sequence,do.call(rbind,lop_coef)))
  
  cairo_ps("./results/pref_lopped_estimates.eps",width=6,height=3.5,fallback_resolution = 1200)
  lopped_plot <- ggplot(lop_coef,aes(x=lop_sequence,y=Estimate))+
    geom_point(size=2)+
    geom_errorbar(aes(ymin=Estimate-1.96*`Cluster s.e.`,ymax=Estimate+1.96*`Cluster s.e.`),width=0.015)+
    geom_hline(yintercept=0,linetype=2,colour="indianred",size=2)+
    xlab("Lopped Distance")+
    ylab("Coefficient on 'District Preference X South'")+
    theme_minimal()
  print(lopped_plot)
  dev.off()
  
# terciles
  
  int_terciles   <- felm(mean_pro_choice~terciles*south-1-south|0|0|rowid,data=prc)
  
  int_weights_terciles <- felm(mean_pro_choice~terciles*south-1-south|0|0|rowid,data=prc,weights=prc$n_votes)
  
  int_matched_terciles   <- felm(temp_outcome~terciles*south-1-south|0|0|rowid,data=matched_prc)
  
  int_weights_matched_terciles <- felm(temp_outcome~terciles*south-1-south|0|0|rowid,data=matched_prc,weights=matched_prc$n_votes)
  
  bivariate_tercile_sg <- stargazer(int_terciles,int_weights_terciles,int_matched_terciles,int_weights_matched_terciles,
                            notes.append = FALSE,notes.label = "",
                            notes=paste("\\parbox[t]{0.7\\textwidth}{\\footnotesize \\textit{Note}: Entries are linear regression coefficients with 
                                    robust standard errors in parentheses. Models estimated without omitted category. Weighted models are weighted according to the number of votes 
                                    cast by each district's representatives. Continuous measure of district preference has been binned into 
                                    terciles based on the full sample, pooled across South and non-South. First tercile is between ", round(quantile(prc$district_preference,0)[1],2),
                                        " and ", round(quantile(prc$district_preference,0.3333333)[1],2),", the second is between ", round(quantile(prc$district_preference,0.33333333)[1],2), " and ", 
                                        round(quantile(prc$district_preference,0.666666666)[1],2),", and the third between ", round(quantile(prc$district_preference,0.3333333)[1],2), " and ",
                                        round(quantile(prc$district_preference,1)[1],2),". ``Joint Test $p$'' is the $p$ value for a joint $F$ test for the hypothesis that ``South $\\times$ Tercile One,'' ``South $\\times$ Tercile Two,''
                                        and ``South $\\times$ Tercile Three'' are each zero. $^{*}p<$0.05 (two-tailed test).}",sep=""),
                                        add.lines = list(c("Weights","","\\checkmark","","\\checkmark"),
                                             c("Joint Test $p$",
                                               round(waldtest(int_terciles,~`terciles1:south`|`terciles2:south`|`terciles3:south`)["p.F"],2),
                                               round(waldtest(int_weights_terciles,~`terciles1:south`|`terciles2:south`|`terciles3:south`)["p.F"],2),
                                               round(waldtest(int_matched_terciles,~`terciles1:south`|`terciles2:south`|`terciles3:south`)["p.F"],2),
                                               round(waldtest(int_weights_matched_terciles,~`terciles1:south`|`terciles2:south`|`terciles3:south`)["p.F"],2))),
                            keep.stat = c("adj.rsq","n"),
                            star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,no.space = T,
                            table.placement = "!ht",
                            dep.var.caption = "Share Votes Pro-Prohibition",
                            dep.var.labels = c("Unmatched","Matched"),
                            covariate.labels = c("Tercile One","Tercile Two","Tercile Three",
                                                  "South $\\times$ Tercile One","South $\\times$ Tercile Two","South $\\times$ Tercile Three"),
                            label="region_bivariate_tercile",
                            table.layout ="-ld-#-t-as-n",
                            title="Congressional Roll Call Voting on Prohibition, Binned Preferences")
  
  cat(bivariate_tercile_sg, sep = '\n', file = "./results/region_bivariate_tercile.tex")
  
  
# Aggregate Score Regressions, Multi-Congress Districts
  
  prc$cong1 <- str_sub(gsub("early","",fixed=T,gsub("late","",fixed=T,prc$distid)),-6,-5)
  prc$cong2 <- str_sub(gsub("early","",fixed=T,gsub("late","",fixed=T,prc$distid)),-3,-2)
  matched_prc$cong1 <- str_sub(gsub("early","",fixed=T,gsub("late","",fixed=T,matched_prc$distid)),-6,-5)
  matched_prc$cong2 <- str_sub(gsub("early","",fixed=T,gsub("late","",fixed=T,matched_prc$distid)),-3,-2)
  
  
  int_onecong   <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=prc[prc$cong1!=prc$cong2,])
  
  int_weights_onecong <- felm(mean_pro_choice~district_preference*south|0|0|rowid,data=prc[prc$cong1!=prc$cong2,],weights=prc$n_votes[prc$cong1!=prc$cong2])
  
  int_matched_onecong   <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_prc[matched_prc$cong1!=matched_prc$cong2,])
  
  int_weights_matched_onecong <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_prc[matched_prc$cong1!=matched_prc$cong2,],weights=matched_prc$n_votes[matched_prc$cong1!=matched_prc$cong2])
  
  bivariate_onecong_sg <- stargazer(int_onecong,int_weights_onecong,int_matched_onecong,int_weights_matched_onecong,
                                    notes.append = FALSE,notes.label = "",
                                    notes="\\parbox[t]{0.75\\textwidth}{\\footnotesize \\textit{Note}: Entries are linear regression coefficients with 
                                            robust standard errors in parentheses. Weighted models are weighted according to the number of votes 
                                            cast by each district's representatives. Matching is based on full sample. $^{*}$p$<$0.05 (two-tailed test).}",
                                    add.lines = list(c("Weights","","\\checkmark","","\\checkmark")),
                                    keep.stat = c("adj.rsq","n"),no.space=T,
                                    star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,
                                    table.placement = "!ht",
                                    dep.var.caption = "Share Votes Pro-Prohibition",
                                    dep.var.labels = c("Unmatched","Matched"),
                                    covariate.labels = c("District Preference","South","District Preference $\\times$ South","Constant"),
                                    label="region_bivariate_onecong",
                                    table.layout ="-ld-#-t-as-n",
                                    title="Congressional Roll Call Voting on Prohibition, Multi-Congress Districts")
  
  cat(bivariate_onecong_sg, sep = '\n', file = "./results/region_bivariate_onecong.tex")
  
  
# Fixed Effects Regressions
  
  int_fe   <- felm(mean_pro_choice~district_preference*south-south|state|0|rowid,data=prc)
  
  int_fe_weights <- felm(mean_pro_choice~district_preference*south-south|state|0|rowid,data=prc,weights=prc$n_votes)
  
  int_fe_matched   <- felm(temp_outcome~district_preference*south-south|state|0|rowid,data=matched_prc)
  
  int_fe_weights_matched <- felm(temp_outcome~district_preference*south-south|state|0|rowid,data=matched_prc,weights=matched_prc$n_votes)
  
  bivariate_fe_sg <- stargazer(int_fe,int_fe_weights,int_fe_matched,int_fe_weights_matched,
                            notes.append = FALSE,notes.label = "",
                            notes="\\parbox[t]{0.75\\textwidth}{\\footnotesize \\textit{Note}: Entries are linear regression coefficients with 
                                    robust standard errors in parentheses. All models included state fixed effects. Weighted models are weighted according to the number of votes 
                                    cast by each district's representatives. $^{*}$p$<$0.05 (two-tailed test).}",
                            add.lines = list(c("Weights","","\\checkmark","","\\checkmark")),
                            keep.stat = c("adj.rsq","n"),no.space=T,
                            star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,
                            table.placement = "!ht",
                            dep.var.caption = "Share Votes Pro-Prohibition",
                            dep.var.labels = c("Unmatched","Matched"),
                            covariate.labels = c("District Preference","District Preference $\\times$ South"),
                            label="region_bivariate_fe",
                            table.layout ="-ld-#-t-as-n",
                            title="Congressional Roll Call Voting on Prohibition, State Fixed Effects")
  
  cat(bivariate_fe_sg, sep = '\n', file = "./results/region_bivariate_fe.tex")  
  
####################################################################  
# Volstead Act #####################################################
####################################################################  
  
  long <- read.csv("./better_prohibition_rollcall_longdat_1910_timing.csv")
  long$Region <- as.factor(long$south)
  long <- long[long$at_large==0,]

  volstead <- long[long$vote_num=="66_v122",]
  volstead[volstead$choice==-1,"choice"] <- 0
  
  volstead <- volstead[is.element(volstead$party,c(100,200)),]
  
  volstead$dem <- ifelse(volstead$party==100,1,0)
  
  volstead$prohib_maj <- ifelse(volstead$prohib_score_mean>=0.5,1,0)
  
  volstead$rowid <- 1:nrow(volstead)
  
# cross-tab ########################################################  
  
  # south table
  
  south_tab <- as.data.frame.matrix(table(volstead$prohib_maj[volstead$south==1],volstead$choice[volstead$south==1]))
  colnames(south_tab) <- c("Nay","Yea"); rownames(south_tab) <- c("Majority Opposed","Majority In Favor")
    south_tab_chisq <- chisq.test(as.matrix(south_tab),correct = T)
  
  # nonsouth table
  
  nonsouth_tab <- as.data.frame.matrix(table(volstead$prohib_maj[volstead$south==0],volstead$choice[volstead$south==0]))
  colnames(nonsouth_tab) <- c("Nay","Yea"); rownames(nonsouth_tab) <- c("Majority Opposed","Majority In Favor")
    nonsouth_tab_chisq <- chisq.test(as.matrix(nonsouth_tab),correct = T)
    
    
  stargazer(south_tab,
            summary = F,
            title = "Preferences and Volstead Act Votes, South",
            notes=paste("\\footnotesize $\\chi^2$ = ",format(round(south_tab_chisq$statistic,2),2),
                       "  ($p =$ ",format(round(south_tab_chisq$p.value,3),nsmall=3),")",sep=""),
            out="./results/south_volstead.tex",
            float=F)
  
  stargazer(nonsouth_tab,
            summary = F,
            title = "Preferences and Volstead Act Votes, North",
            notes=paste("\\footnotesize $\\chi^2$ = ",format(round(nonsouth_tab_chisq$statistic,2),2),
                        "  ($p =$ ",format(round(nonsouth_tab_chisq$p.value,3),nsmall=3),")",sep=""),
            out="./results/nonsouth_volstead.tex",
            float=F)
  
  # south off-diagonal proportion
  
      1-sum(diag(as.matrix(prop.table(south_tab))))
      
  # non-south off-diagonal proportion    
      
      1-sum(diag(as.matrix(prop.table(nonsouth_tab))))
  
# OLS #####################################################
    
    volstead_int <- felm(choice~prohib_score_mean*south|0|0|rowid,data=volstead)
    
    volstead_int_party <- felm(choice~prohib_score_mean*south+dem|0|0|rowid,data=volstead)
    
    volstead_sg <- stargazer(volstead_int,volstead_int_party,
                             notes.append = FALSE,notes.label = "",
                             notes="\\parbox[t]{0.55\\textwidth}{\\footnotesize \\textit{Note}: Entries are linear regression coefficients with
                             robust standard errors in parentheses. $^{*}$p$<$0.05 (two-tailed test).}",
                              add.lines = list(c("Weights","No","Yes")),
                             keep.stat = c("adj.rsq","n"),
                             star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,
                             table.placement = "!ht",no.space=T,
                             dep.var.labels = "Volstead Act Vote",
                             covariate.labels = c("District Preference","South","Democrat","District Preference $\\times$ South","Constant"),
                             label="volstead",
                             table.layout ="-ld-#-t-as-n",
                             title="Volstead Act Roll Call, Ordinary Least Squares")
    
    cat(volstead_sg, sep = '\n', file = "./results/volstead.tex")        
    
#####################################################
# Vote-Level Logit Analyses #########################
#####################################################
  
  # data
  
  prc_long10 <- read.csv("./better_prohibition_rollcall_longdat_1910_timing.csv")
  prc_long10$era <- "old"; prc_long10$era_num <- 1
  prc_long10$Region <- as.factor(prc_long10$south)
  prc_long30 <- read.csv("./better_prohibition_rollcall_longdat_1930_timing.csv")
  prc_long30$era <- "new"; prc_long30$era_num <- 2
  prc_long30$Region <- as.factor(prc_long30$south)

  prc_long   <- rbind(prc_long10,prc_long30)
  prc_long$distid <- paste(prc_long$distid,prc_long$era,sep="_")
  prc_long$state <- as.character(prc_long$state)
  
  prc_long <- prc_long[prc_long$at_large==0,]

  # matched data

  list_binder_long <- list()

  for(j in 1:length(included_mix_dists)){

    list_binder_long[[j]] <- prc_long[prc_long$distid==included_mix_dists[j],]

  }

  matched_prc_long <- do.call(rbind,list_binder_long)
  matched_prc_long$temp_outcome <- matched_prc_long$pro_choice
  
  prc_long$temp_outcome <- prc_long$pro_choice

# logit analysis
  
  # logits
  
  logit <- glm(pro_choice~prohib_score_mean*south+factor(vote_num),family=binomial,data=prc_long)
  logit_vcov <- cluster.vcov(logit,cbind(prc_long$vote_num,prc_long$distid))
  
  logit_match <- glm(pro_choice~prohib_score_mean*south+factor(vote_num),family=binomial,data=matched_prc_long)
  logit_match_vcov <- cluster.vcov(logit_match,cbind(matched_prc_long$distid))
  
  logit_maj <- glm(pro_choice~I(prohib_score_mean>=0.5)*south+factor(vote_num),family=binomial,data=prc_long)
  logit_maj_vcov <- cluster.vcov(logit_maj,cbind(prc_long$vote_num,prc_long$distid))
  
  logit_maj_match <- glm(pro_choice~I(prohib_score_mean>=0.5)*south+factor(vote_num),family=binomial,data=matched_prc_long)
  logit_maj_match_vcov <- cluster.vcov(logit_maj_match,cbind(matched_prc_long$distid))
  
  # linear probability model
  
  ols <- felm(temp_outcome~prohib_score_mean*south|vote_num|0|distid,data=prc_long)

  match_ols <- felm(temp_outcome~prohib_score_mean*south|vote_num|0|distid,data=matched_prc_long)
  
  ols_maj <- felm(temp_outcome~I(prohib_score_mean>=0.5)*south|vote_num|0|distid,data=prc_long)
  
  match_maj_ols <- felm(temp_outcome~I(prohib_score_mean>=0.5)*south|vote_num|0|distid,data=matched_prc_long)
  
  logit_sg <- stargazer(ols,match_ols,ols_maj,match_maj_ols,logit,logit_match,logit_maj,logit_maj_match,
                        se=list(sqrt(diag(ols$clustervcv)),sqrt(diag(match_ols$clustervcv)),
                                sqrt(diag(ols_maj$clustervcv)),sqrt(diag(match_maj_ols$clustervcv)),
                                sqrt(diag(logit_vcov)),sqrt(diag(logit_match_vcov)),
                                sqrt(diag(logit_maj_vcov)),sqrt(diag(logit_maj_match_vcov))),
                        notes.append = FALSE,notes.label = "",column.sep.width="0pt",
                        notes="\\parbox[t]{0.995\\textwidth}{\\footnotesize \\textit{Note}: Entries are coefficients with 
                        district-clustered standard errors in parentheses. Models 1-4 are linear probability model coefficients, and Models 5-8 are logit coefficients.
                        All models include roll call-specific indicator variables.
                        $^{*}$p$<$0.05 (two-tailed test).}",
                        add.lines = list(c("Matched Sample","","\\checkmark","","\\checkmark","","\\checkmark","","\\checkmark")),
                        keep.stat = c("n"),font.size = "footnotesize",
                        star.char=c("*"),star.cutoffs = c(0.05),digits=3,digits.extra=0,no.space=T,
                        table.placement = "!ht",
                        dep.var.caption = "Vote Pro-Prohibition",
                        dep.var.labels = c("Linear Probability Model","Logit"),
                        keep=c("prohib_score_mean","south","prohib_score_mean:south","I(prohib_score_mean>=0.5)","I(prohib_score_mean>=0.5):south"),
                        covariate.labels = c("District Preference",
                                             "District Maj. for Prohibition",
                                             "South",
                                             "Preference $\\times$ South",
                                             "Maj. for Prohibition $\\times$ South"),
                        label="logit_cong",
                        table.layout ="-ld-#-t-as-n",
                        title="Congressional Roll Call Voting, Vote-Level Analyses")
  
  cat(logit_sg, sep = '\n', file = "./results/logit_cong.tex")

##############################################  
# assessing variation across matched samples #
##############################################
  
  # now we create a matched dataset for each of the above 
  
  set.seed(53074)  
  coef_loop <- list()
  
  # main data #
  
  for(i in 1:1000){
    
    prc_matcher <- prc  # create duplicate of main data
    
    # mahalanobis distance matching
    
    south_match_out <- Match(Tr=prc_matcher$south,
                             X=cbind(prc_matcher$district_preference,prc_matcher$era_num),
                             exact=c(F,T),
                             caliper=0.1,Weight=2,
                             distance.tolerance = 1e-100000000,
                             ties=F)
    
    # extract matched districts
    
    included_dists <- prc_matcher$distid[c(south_match_out$index.control,south_match_out$index.treated)]
    
    # now loop over the included districts
    # we do this because there are potential duplicates in the matched districts, and we want to include them 
    # the correct number of times
    
    list_binder <- list()
    
    for(j in 1:length(included_dists)){
      
      list_binder[[j]] <- prc[prc$distid==included_dists[j],]
      
    }
    
    matched_prc <- do.call(rbind,list_binder)
    matched_prc$temp_outcome <- matched_prc$mean_pro_choice
    
    int_matched_loop <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_prc)
    int_matched_weights_loop <- felm(temp_outcome~district_preference*south|0|0|rowid,data=matched_prc,
                                     weights=matched_prc$n_votes)
    
    coef_loop[[i]] <- c(coef(int_matched_loop)["district_preference:south"],
                        coef(int_matched_weights_loop)["district_preference:south"])
  
  }
  
  coef_loop <- as.data.frame(do.call(rbind,coef_loop)); colnames(coef_loop) <- c("Unweighted","Weighted")
  
  coef_loop <- coef_loop %>% gather(Type,value)
  
  cairo_ps("./results/matching_iterations.eps",width=6,height=3.5,fallback_resolution = 1200)
  print(ggplot(coef_loop,aes(x=value,fill=Type))+
          geom_histogram(position="identity",bins=15,alpha=0.75)+
          xlab("Coefficient on `District Preference X South` across Matched Samples")+
          ylab("Count")+
          theme_minimal()+
          geom_vline(xintercept=0,size=2,linetype=2)+
          scale_fill_grey(start=0.6,end=0.3)+
          theme(axis.text=element_text(size=12,face="bold")))
  dev.off()
  
